{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [],
   "source": [
    "# work in progress"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# NOTE\n",
    "# \n",
    "# could it be a confound if smaller pain reductions are not detected?\n",
    "# or if big pain reductions are quite rare (3 per one trial over 4 min, as of now)\n",
    "# -> should we only do \"big\" pain reductions?"
   ]
  },
  {
   "attachments": {},
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Estimation of data set size"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [],
   "source": [
    "from prettytable import PrettyTable\n",
    "import numpy as np"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Note: Samples are defined as sections with big temperature reductions.\n"
     ]
    }
   ],
   "source": [
    "print(\"Note: Samples are defined as sections with big temperature reductions.\")\n",
    "\n",
    "def _holdout_sizes(size, val, test):\n",
    "    \"\"\"\n",
    "    Returns the number of subjects/samples for training, validation, and test sets.\n",
    "    \n",
    "    Parameters:\n",
    "    - size: Total number of subjects/samples\n",
    "    - val: Proportion of validation set\n",
    "    - test: Proportion of test set\n",
    "    \n",
    "    Returns:\n",
    "    - List of sizes [train, val, test]\n",
    "    \"\"\"\n",
    "    train = 1 - val - test\n",
    "    return [int(size * train), int(size * val), int(size * test)]\n",
    "\n",
    "\n",
    "def _print_info(n_subjects_aim, n_samples_per_subject, n_subjects_holdout):\n",
    "    \"\"\"\n",
    "    Prints the details of the dataset.\n",
    "    \n",
    "    Parameters:\n",
    "    - n_subjects_aim: Number of subjects aimed for\n",
    "    - n_samples_per_subject: Number of samples per subject\n",
    "    - n_subjects_holdout: List of sizes [train, val, test] for subjects\n",
    "    \"\"\"\n",
    "    table = PrettyTable()\n",
    "    table.field_names = [\"Info\", \"Value\"]\n",
    "    table.align[\"Info\"] = \"l\"\n",
    "    table.align[\"Value\"] = \"r\"\n",
    "\n",
    "    table.add_row([\"Number of subjects\", n_subjects_aim])\n",
    "    table.add_row([\"Number of samples\", n_subjects_aim * n_samples_per_subject])\n",
    "    table.add_row([\"Number of samples per subject\", n_samples_per_subject])\n",
    "    table.add_row([\"Number of subjects in holdout set (Train/Val/Test)\", f\"{n_subjects_holdout[0]}/{n_subjects_holdout[1]}/{n_subjects_holdout[2]}\"])\n",
    "    table.add_row([\"Number of samples in train set\", n_subjects_holdout[0] * n_samples_per_subject])\n",
    "    \n",
    "    print(table)\n",
    "\n",
    "\n",
    "def estimate_dataset_size(n_subjects_aim=None, n_samples_aim=None):\n",
    "    \"\"\"\n",
    "    Estimates the size of the dataset.\n",
    "    \n",
    "    Parameters:\n",
    "    - n_subjects_aim: Number of subjects aimed for\n",
    "    - n_samples_aim: Number of samples aimed for\n",
    "    \"\"\"\n",
    "    n_samples_per_trial = 3\n",
    "    n_trials = 3\n",
    "    n_skin_areas = 3\n",
    "    n_samples_per_subject = n_trials * n_skin_areas * n_samples_per_trial\n",
    "    time_per_trial_s = 250\n",
    "    time_per_session_min = n_skin_areas * n_trials * time_per_trial_s // 60\n",
    "    \n",
    "    if n_subjects_aim is None and n_samples_aim is None:\n",
    "        raise ValueError(\"Either n_subjects_aim or n_samples_aim must be provided.\")\n",
    "    \n",
    "    if n_subjects_aim:\n",
    "        print(\"Estimate dataset size via n_subjects_aim:\\n\")\n",
    "        n_samples_aim = n_subjects_aim * n_samples_per_subject\n",
    "    else:\n",
    "        print(\"Estimate dataset size via n_samples_aim:\\n\")\n",
    "        n_subjects_aim = int(np.ceil(n_samples_aim / n_samples_per_subject))\n",
    "        \n",
    "    print(f\"Stimulation time per session: {time_per_session_min} min\")\n",
    "    print(f\"Stimulation time per trial: {time_per_trial_s} s\\n\")\n",
    "    print(f\"Number of samples per subject: {n_samples_per_subject}\")\n",
    "\n",
    "    n_subjects_holdout = _holdout_sizes(n_subjects_aim, 0.2, 0.2)\n",
    "    _print_info(n_subjects_aim, n_samples_per_subject, n_subjects_holdout)\n",
    "    \n",
    "    info_dict = {\n",
    "        'time_per_session_min': time_per_session_min,\n",
    "        'time_per_trial_s': time_per_trial_s,\n",
    "        'n_samples_per_subject': n_samples_per_subject,\n",
    "        'n_subjects_aim': n_subjects_aim,\n",
    "        'n_samples_aim': n_samples_aim,\n",
    "        'n_subjects_holdout': n_subjects_holdout,\n",
    "        'n_samples_in_train_set': n_subjects_holdout[0] * n_samples_per_subject\n",
    "    }\n",
    "    \n",
    "    return info_dict"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Estimation of dataset size"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### via n_subjects_aim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimate dataset size via n_subjects_aim:\n",
      "\n",
      "Stimulation time per session: 37 min\n",
      "Stimulation time per trial: 250 s\n",
      "\n",
      "Number of samples per subject: 27\n",
      "+----------------------------------------------------+----------+\n",
      "| Info                                               |    Value |\n",
      "+----------------------------------------------------+----------+\n",
      "| Number of subjects                                 |       50 |\n",
      "| Number of samples                                  |     1350 |\n",
      "| Number of samples per subject                      |       27 |\n",
      "| Number of subjects in holdout set (Train/Val/Test) | 30/10/10 |\n",
      "| Number of samples in train set                     |      810 |\n",
      "+----------------------------------------------------+----------+\n"
     ]
    }
   ],
   "source": [
    "n_subjects_aim = 50\n",
    "info = estimate_dataset_size(n_subjects_aim = n_subjects_aim)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### via n_samples_aim"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Estimate dataset size via n_samples_aim:\n",
      "\n",
      "Stimulation time per session: 37 min\n",
      "Stimulation time per trial: 250 s\n",
      "\n",
      "Number of samples per subject: 27\n",
      "+----------------------------------------------------+----------+\n",
      "| Info                                               |    Value |\n",
      "+----------------------------------------------------+----------+\n",
      "| Number of subjects                                 |       67 |\n",
      "| Number of samples                                  |     1809 |\n",
      "| Number of samples per subject                      |       27 |\n",
      "| Number of subjects in holdout set (Train/Val/Test) | 40/13/13 |\n",
      "| Number of samples in train set                     |     1080 |\n",
      "+----------------------------------------------------+----------+\n"
     ]
    }
   ],
   "source": [
    "n_samples_aim = 1800\n",
    "info = estimate_dataset_size(n_samples_aim = n_samples_aim)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Estimation of work hours"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Time per subject: 97 min\n",
      "Total work hours for both experiments: 216 h\n",
      "Total work weeks (15 h) for both experiments: 14.4 weeks\n"
     ]
    }
   ],
   "source": [
    "setting_everything_up_min = 60\n",
    "\n",
    "time_per_subject_min = info['time_per_session_min'] + setting_everything_up_min\n",
    "time_per_experiment_min = time_per_subject_min * info['n_subjects_aim']\n",
    "time_per_experiment_h = time_per_experiment_min // 60\n",
    "n_experiments = 2\n",
    "time_experiments_h = time_per_experiment_h * n_experiments\n",
    "work_hours_per_week = 15\n",
    "time_experiments_weeks = time_experiments_h / work_hours_per_week\n",
    "\n",
    "print(f\"Time per subject: {time_per_subject_min} min\")\n",
    "print(f\"Total work hours for both experiments: {time_experiments_h} h\")\n",
    "print(f\"Total work weeks ({work_hours_per_week} h) for both experiments: {time_experiments_weeks:.1f} weeks\")"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "learning",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  },
  "orig_nbformat": 4,
  "vscode": {
   "interpreter": {
    "hash": "9b99f9135959a9658cb812fe0ba5c4f69cd3d89b5662a926a2ace19e3111ee69"
   }
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
